setwd("C:\\Users\\Avell B153\\Desktop\\embrapa_TAGC_2013\\codigosR_TAGC")

fen=read.table("fen.txt",h=T) #lendo os dados fenotpicos longitudinais

library(nlme)

fen1=groupedData(y~x|id,fen) #agrupando os dados por ID

step1=nlsList(y ~ SSlogis(x, Asym, xmid, scal), fen1) #ajustando o modelo Logisitco para cada ID

step1_out=coef(step1) #arquivo com as estimativas
step1_out
dim(step1_out)
step1_out1=na.omit(step1_out) #eliminando IDs nao convergiram
dim(step1_out1)
id_conv=rownames(step1_out1) #IDs convergiram
step1_out2=cbind(id_conv,step1_out1) #inserindo col de IDs
colnames(step1_out2)=c("id","fi1","fi2","fi3") #renomeando

#identificando IDs no convergiram
outersect <- function(x, y) {sort(c(setdiff(x, y),setdiff(y, x)))}
id_total=unique(fen[,1])
id_nonconv=data.frame(outersect(id_total, id_conv))
id_nonconv
length(id_nonconv)
colnames(id_nonconv)=c("id")

gen=read.table("gen.txt",h=T) #lendo os dados genotpicos 0, 1 e 2

dados=merge(step1_out2,gen,by=intersect("id","id")) #dados a serem usados na anlise (genotipos Ids converg)
dados[1:20,1:30]

dados_val=merge(id_nonconv,gen,by=intersect("id","id")) #dados validacao (genotipos Ids no converg)
dados_val[1:20,1:30]

true=read.table("true.txt",h=T) #lendo os valores genticos verdadeiros
true1=merge(step1_out2,true,by=intersect("id","id")) #apenas IDs converg
dim(true1)
head(true1)

true2=merge(id_nonconv,true,by=intersect("id","id")) #apenas IDs no converg
dim(true2)
head(true2)

fi1=dados[,2] #fentipo: estimativas de fi1
fi2=dados[,3] #fentipo: estimativas de fi2
fi3=dados[,4] #fentipo: estimativas de fi3
M=dados[,-c(1:4)] #genotipos IDs treinamento
M_val=dados_val[,-1] #genotipos IDs valid

library(BGLR)
set.seed(12345678) #semente aletria para o MCMC

####bayesian Ridge regression######

RR=list(list(X=M,model='BRR'))

RR_fi1=BGLR(y=fi1,ETA=RR,nIter=10000,burnIn=4000,thin=3)
RR_fi2=BGLR(y=fi2,ETA=RR,nIter=10000,burnIn=4000,thin=3)
RR_fi3=BGLR(y=fi3,ETA=RR,nIter=10000,burnIn=4000,thin=3)

RR_u_fi1=RR_fi1$yHat
RR_u_fi2=RR_fi2$yHat
RR_u_fi3=RR_fi3$yHat
RR_u_t600=RR_u_fi1/(1 + exp((RR_u_fi2 - 600)/RR_u_fi3))

RR_ac_fi1=cor(true1$fi1.y,RR_u_fi1)
RR_ac_fi2=cor(true1$fi2.y,RR_u_fi2)
RR_ac_fi3=cor(true1$fi3.y,RR_u_fi3)
RR_ac_t600=cor(true1$t600,RR_u_t600)

result_RR=cbind(RR_ac_fi1,RR_ac_fi2,RR_ac_fi3,RR_ac_t600)
result_RR

RR_u_val_fi1=as.matrix(M_val)%*%RR_fi1$ETA[[1]]$b
RR_u_val_fi2=as.matrix(M_val)%*%RR_fi2$ETA[[1]]$b
RR_u_val_fi3=as.matrix(M_val)%*%RR_fi3$ETA[[1]]$b
RR_u_val_t600=RR_u_val_fi1/(1 + exp((RR_u_val_fi2 - 600)/RR_u_val_fi3))

RR_val_fi1=cor(true2$fi1,RR_u_val_fi1)
RR_val_fi2=cor(true2$fi2,RR_u_val_fi2)
RR_val_fi3=cor(true2$fi3,RR_u_val_fi3)
RR_val_t600=cor(true2$t600,RR_u_val_t600)

result_RR_val=cbind(RR_val_fi1,RR_val_fi2,RR_val_fi3,RR_val_t600)
result_RR_val


####BayesA######
library(BGLR)
BA=list(list(X=M,model='BayesA'))

BA_fi1=BGLR(y=fi1,ETA=BA,nIter=10000,burnIn=4000,thin=3)
BA_fi2=BGLR(y=fi2,ETA=BA,nIter=10000,burnIn=4000,thin=3)
BA_fi3=BGLR(y=fi3,ETA=BA,nIter=10000,burnIn=4000,thin=3)

BA_u_fi1=BA_fi1$yHat
BA_u_fi2=BA_fi2$yHat
BA_u_fi3=BA_fi3$yHat
BA_u_t600=BA_u_fi1/(1 + exp((BA_u_fi2 - 600)/BA_u_fi3))

BA_ac_fi1=cor(true1$fi1.y,BA_u_fi1)
BA_ac_fi2=cor(true1$fi2.y,BA_u_fi2)
BA_ac_fi3=cor(true1$fi3.y,BA_u_fi3)
BA_ac_t600=cor(true1$t600,BA_u_t600)

result_BA=cbind(BA_ac_fi1,BA_ac_fi2,BA_ac_fi3,BA_ac_t600)
result_BA

BA_u_val_fi1=as.matrix(M_val)%*%BA_fi1$ETA[[1]]$b
BA_u_val_fi2=as.matrix(M_val)%*%BA_fi2$ETA[[1]]$b
BA_u_val_fi3=as.matrix(M_val)%*%BA_fi3$ETA[[1]]$b
BA_u_val_t600=BA_u_val_fi1/(1 + exp((BA_u_val_fi2 - 600)/BA_u_val_fi3))

BA_val_fi1=cor(true2$fi1,BA_u_val_fi1)
BA_val_fi2=cor(true2$fi2,BA_u_val_fi2)
BA_val_fi3=cor(true2$fi3,BA_u_val_fi3)
BA_val_t600=cor(true2$t600,BA_u_val_t600)

result_BA_val=cbind(BA_val_fi1,BA_val_fi2,BA_val_fi3,BA_val_t600)
result_BA_val

####BayesB######

BB=list(list(X=M,model='BayesB',probIn=0.8))

BB_fi1=BGLR(y=fi1,ETA=BB,nIter=10000,burnIn=4000,thin=3)
BB_fi2=BGLR(y=fi2,ETA=BB,nIter=10000,burnIn=4000,thin=3)
BB_fi3=BGLR(y=fi3,ETA=BB,nIter=10000,burnIn=4000,thin=3)

BB_u_fi1=BB_fi1$yHat
BB_u_fi2=BB_fi2$yHat
BB_u_fi3=BB_fi3$yHat
BB_u_t600=BB_u_fi1/(1 + exp((BB_u_fi2 - 600)/BB_u_fi3))

BB_ac_fi1=cor(true1$fi1.y,BB_u_fi1)
BB_ac_fi2=cor(true1$fi2.y,BB_u_fi2)
BB_ac_fi3=cor(true1$fi3.y,BB_u_fi3)
BB_ac_t600=cor(true1$t600,BB_u_t600)

result_BB=cbind(BB_ac_fi1,BB_ac_fi2,BB_ac_fi3,BB_ac_t600)
result_BB

BB_u_val_fi1=as.matrix(M_val)%*%BB_fi1$ETA[[1]]$b
BB_u_val_fi2=as.matrix(M_val)%*%BB_fi2$ETA[[1]]$b
BB_u_val_fi3=as.matrix(M_val)%*%BB_fi3$ETA[[1]]$b
BB_u_val_t600=BB_u_val_fi1/(1 + exp((BB_u_val_fi2 - 600)/BB_u_val_fi3))

BB_val_fi1=cor(true2$fi1,BB_u_val_fi1)
BB_val_fi2=cor(true2$fi2,BB_u_val_fi2)
BB_val_fi3=cor(true2$fi3,BB_u_val_fi3)
BB_val_t600=cor(true2$t600,BB_u_val_t600)

result_BB_val=cbind(BB_val_fi1,BB_val_fi2,BB_val_fi3,BB_val_t600)
result_BB_val

######Bayesian LASSO######

BL=list(list(X=M,model='BL'))

BL_fi1=BGLR(y=fi1,ETA=BL,nIter=10000,burnIn=4000,thin=3)
BL_fi2=BGLR(y=fi2,ETA=BL,nIter=10000,burnIn=4000,thin=3)
BL_fi3=BGLR(y=fi3,ETA=BL,nIter=10000,burnIn=4000,thin=3)

BL_u_fi1=BL_fi1$yHat
BL_u_fi2=BL_fi2$yHat
BL_u_fi3=BL_fi3$yHat
BL_u_t600=BL_u_fi1/(1 + exp((BL_u_fi2 - 600)/BL_u_fi3))

BL_ac_fi1=cor(true1$fi1.y,BL_u_fi1)
BL_ac_fi2=cor(true1$fi2.y,BL_u_fi2)
BL_ac_fi3=cor(true1$fi3.y,BL_u_fi3)
BL_ac_t600=cor(true1$t600,BL_u_t600)

result_BL=cbind(BL_ac_fi1,BL_ac_fi2,BL_ac_fi3,BL_ac_t600)
result_BL

BL_u_val_fi1=as.matrix(M_val)%*%BL_fi1$ETA[[1]]$b
BL_u_val_fi2=as.matrix(M_val)%*%BL_fi2$ETA[[1]]$b
BL_u_val_fi3=as.matrix(M_val)%*%BL_fi3$ETA[[1]]$b
BL_u_val_t600=BL_u_val_fi1/(1 + exp((BL_u_val_fi2 - 600)/BL_u_val_fi3))

BL_val_fi1=cor(true2$fi1,BL_u_val_fi1)
BL_val_fi2=cor(true2$fi2,BL_u_val_fi2)
BL_val_fi3=cor(true2$fi3,BL_u_val_fi3)
BL_val_t600=cor(true2$t600,BL_u_val_t600)

result_BL_val=cbind(BL_val_fi1,BL_val_fi2,BL_val_fi3,BL_val_t600)
result_BL_val


###apresentando os resultados em tabelas#####

tabela_result=cbind(c("result_RR","result_BA","result_BB","result_BL"),rbind(result_RR,result_BA,result_BB,result_BL))
colnames(tabela_result)=c("metodo","fi1","fi2","fi3","t600")

tabela_valid=cbind(c("result_RR_val","result_BA_val","result_BB_val","result_BL_val"),rbind(result_RR_val,result_BA_val,result_BB_val,result_BL_val))
colnames(tabela_valid)=c("metodo","fi1","fi2","fi3","t600")


######identificao de QTLs: Bayesian LASSO##############

snp_ef=read.table("snp_ef.txt",h=T) #lendo arquivo de efeitos de SNPs estimados via BL
map= read.table("mapa.txt", h=T)  #lendo arquivo de mapa

snp_fim=merge(snp_ef,map,by=intersect("snp_names","snp_names"))
head(snp_fim)

library(gap) #pacote que contem o grafico Manhattan

###plot para fi1

snp_fi1=snp_fim[,c(5,6,2)]
par(las=2, xpd=TRUE, cex.axis=0.9, cex=0.8)
color = cbind(rep(c("red","blue"),5),c("blue"))

mhtplot(abs(snp_fi1),mht.control(logscale=FALSE, colors=color,labels=seq(1,5),srt=0), ylab="SNP effects  BL fi1",pch=19)
 axis(2, at = seq(0, 20, by = 4))


###plot para fi2

snp_fi2=snp_fim[,c(5,6,3)]
par(las=2, xpd=TRUE, cex.axis=0.9, cex=0.8)
color = cbind(rep(c("red","blue"),5),c("blue"))

mhtplot(abs(snp_fi2),mht.control(logscale=FALSE, colors=color,labels=seq(1,5),srt=0), ylab="SNP effects  BL fi2",pch=19)
 axis(2, at = seq(0, 20, by = 4))

###plot para fi3

snp_fi3=snp_fim[,c(5,6,4)]
par(las=2, xpd=TRUE, cex.axis=0.9, cex=0.8)
color = cbind(rep(c("red","blue"),5),c("blue"))

mhtplot(abs(snp_fi3),mht.control(logscale=FALSE, colors=color,labels=seq(1,5),srt=0), ylab="SNP effects  BL fi3",pch=19)
 axis(2, at = seq(0, 20, by = 4))

########curvas de crescimento genmicas############

y_gen0=BL_u_fi1/(1+exp((BL_u_fi2-0)/BL_u_fi3))
y_gen132=BL_u_fi1/(1+exp((BL_u_fi2-132)/BL_u_fi3))
y_gen265=BL_u_fi1/(1+exp((BL_u_fi2-265)/BL_u_fi3))
y_gen397=BL_u_fi1/(1+exp((BL_u_fi2-397)/BL_u_fi3))
y_gen530=BL_u_fi1/(1+exp((BL_u_fi2-530)/BL_u_fi3))

id=sort(rep(dados[,1],5))
x=rep(c(0,132,265,397,530),881)

curva=cbind(id,x,y_gen0,y_gen132,y_gen265,y_gen397,y_gen530)
GEBV=matrix(t(curva[,-(1:2)]),byrow=F)
curva_fim=data.frame(cbind(id,x,GEBV))
colnames(curva_fim)=c("id","x","GEBV")

#### grfico das curvas de crescimento genmicas estimadas para os 100 primeiros ids
library(lattice)
xyplot(GEBV[1:500] ~ x[1:500], data = curva_fim, groups = id, type="a",
xlim=c(0,530), xlab="tempo (dias)", ylab="Peso (kg)",auto.key = list(space =
"right", points = FALSE, lines = TRUE))

##### GEBVs para no tempo no observado x=100 dias

y_gen100=BL_u_fi1/(1+exp((BL_u_fi2-100)/BL_u_fi3))

#### Estimando efeitos de marcadores ao longo do tempo

library(MASS)

M=as.matrix(M)
snp0=ginv(t(M)%*%M)%*%(t(M)%*%y_gen0)

snp100=ginv(t(M)%*%M)%*%(t(M)%*%y_gen100) #OBS. X=100  um tempo no observado

snp530=ginv(t(M)%*%M)%*%(t(M)%*%y_gen530)










